library(ggplot2)
# Load the Iris data
data(iris)
plot(iris)
plot(iris$Sepal.Length,iris$Sepal.Width)
hist(iris$Sepal.Length)
boxplot(iris)
## basic plot
qplot(iris$Sepal.Length, iris$Sepal.Width)
# Can also use a dataframe
qplot(Sepal.Length, Sepal.Width, data = iris)
#Plot types can be specified with the `geom` option
qplot(Sepal.Length, Sepal.Width, data = iris, geom = "point")
qplot(Sepal.Length, Sepal.Width, data = iris, geom = "line")
#Compare with
plot(iris$Sepal.Length,iris$Sepal.Width,type="l")
qplot(Sepal.Length, Sepal.Width, data = iris, geom = c("line","point"))
We can combining plot types
qplot(x = Species, y = Sepal.Length, data = iris, geom = c("boxplot","point"))
qplot(x = Species, y = Sepal.Length, data = iris, geom = c("boxplot","point","jitter"))
In ggplot2, additional variables can be mapped to plot aesthetics
including color, fill, shape,
size, alpha, linetype.
qplot(x = Species, y = Sepal.Length, data = iris, geom = c("boxplot", "jitter"), color = Sepal.Width)
qplot(x = Sepal.Width, y = Sepal.Length, data = iris, geom = c("point", "smooth"),
color = Species, size = Petal.Width)
Sometimes we want to look at the conditional distribution of a
variable and visualize some characteristic of a dataset conditioning on
the levels of some other variable. For this we use the
facets argument.
Facet by columns.
qplot(x = Sepal.Width, y = Sepal.Length, data = iris, geom = c("point","smooth"), color = Species, size = Petal.Width, facets = ~Species)
Facet by rows
qplot(x = Sepal.Width, y = Sepal.Length, data = iris, geom = c("point","smooth"), color = Species, size = Petal.Width, facets = Species ~ .)
Using our long format dataframe, we will further explore the iris dataset.
iris_long <- readRDS("iris_long.rds")
## "Very" simple plot
ggplot(data = iris_long, aes(x = Width, y= Length))
## let's add some data to it.
ggplot(data = iris_long, aes(x = Width, y= Length)) +
geom_point()
## let's add some data to it.
ggplot(data = iris_long, aes(x = Width, y= Length)) +
geom_point() +
facet_wrap(Species ~ flower_part, scales = "free")
## switch the +
# Add a regression line
ggplot(data = iris_long, aes(x = Width, y = Length)) +
geom_point() +
facet_wrap(Species ~ flower_part, scales = "free") +
geom_smooth(method = "lm")
iris_cast <- iris_long
Let’s add the options ‘shape’ and ‘color’ to the aes call.
my_plot <- ggplot(data = iris_cast, aes(x = Width, y = Length, shape = flower_part, color = flower_part)) +
geom_point() +
facet_grid(~Species) +
geom_smooth(method = "lm")
my_plot
# Notice that ggplot created an object that we can add to.
# For example if we wanted to add black/white theme and increase the font size
my_plot + theme_bw(base_size = 24)
Using facet_wrap instead of facet_grid.
ggplot(data = iris_cast, aes(x = Width, y = Length, shape = Species, color = Species)) +
geom_point() +
facet_wrap(~flower_part) +
geom_smooth(method = "lm")
## Saving your plot
ggsave("myplot.png", width = 5, height = 5)
Exploring some themes:
library(ggthemes)
my_plot + theme_excel(base_size = 24)
my_plot + theme_wsj(base_size = 18)
Lets try a larger dataset.
#### First some basic plots ####
ggplot(diamonds, aes(carat, price)) +
geom_point()
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_point()
## Now we'll change the look of the plot ##
ggplot(diamonds, aes(carat, price)) +
geom_point(color = "blue")
## What can you see from this plot
## Lets make the points more visible.
ggplot(diamonds, aes(carat, price)) +
geom_point(alpha = 1/20)
ggplot(diamonds, aes(carat, price)) +
geom_point(shape = 1)
ggplot(diamonds, aes(carat, price)) +
geom_point(size = 0.1)
## Now let's add a smoother, and look at the relationship by cut.
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_point() +
geom_smooth() ##by default geom_smooth uses a lowess smoother.
ggplot(diamonds, aes(log10(carat), log10(price), color = cut)) +
geom_point(alpha = 1/10) +
geom_smooth()
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_point(aes(color = cut),alpha = 1/10) +
geom_smooth()
### Now let's see the linear model.
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_point() +
geom_smooth(method = "lm")
mod <- lm(log10(price) ~ log10(carat), data = diamonds)
diamonds$resid <- resid(mod)
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_point(aes(colour = resid)) +
geom_smooth(method = "lm")
######## Boxplots #######
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_boxplot(aes(cut))
### A 'violin' plot
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_violin(aes(group = plyr::round_any(log10(carat), 0.1)), scale = "width")
### Plain ole histogram
ggplot(diamonds, aes(depth)) +
geom_histogram(binwidth = 0.2) +
xlim(56, 67)
### Layered histogram by cut:
ggplot(diamonds, aes(depth, fill = cut)) +
geom_histogram(binwidth = 0.2) +
xlim(56, 67)
### Histrograms separated by cut
ggplot(diamonds, aes(depth)) +
geom_histogram(binwidth = 0.2) +
facet_wrap(~cut) +
xlim(56, 67)
## Same but with different scales.
ggplot(diamonds, aes(depth)) +
geom_histogram() +
facet_wrap(~cut,scales = 'free') +
xlim(56, 67)
### Similar to the layered but with lines
ggplot(diamonds, aes(depth, color = cut)) +
geom_freqpoly(binwidth = 0.2) +
xlim(56, 67)
ggplot(diamonds, aes(depth, color = cut)) +
geom_freqpoly(aes(y = ..density..), binwidth = 0.2) +
xlim(56, 67)
### Some plot of the price.
ggplot(diamonds, aes(cut, log10(price))) +
geom_violin()
ggplot(diamonds, aes(log10(price))) +
geom_density(aes(color = cut))
# Some heat maps to give some information on the joint distribution
## Standard
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_point(alpha = 1/10)
## Colored heat map
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_bin2d()
## Contoured heat map
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_density2d()
# Some comparisons of the joint distribution of carat and price by color.
## Standard
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_point(aes(color = clarity))
## Using a facet
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_point() +
facet_wrap(~ color)
## Using a facet and a heat map.
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_bin2d() +
facet_wrap(~ color)
## Adding a reference line to the above can be useful.
coef(lm(log10(price) ~ log10(carat), data = diamonds))
## (Intercept) log10(carat)
## 3.669207 1.675817
mod <- lm(log10(price) ~ log10(carat), data = diamonds)
mod_coef <- coef(mod)
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_bin2d() +
geom_abline(intercept = mod_coef[1], slope = mod_coef[2], colour = "white") +
facet_wrap(~color)